#!/usr/bin/env python 
# -*- coding:utf-8 -*-
'''
@File    :   constent.py    
@Modify Time      @Author    @Version    @Desciption
------------      -------    --------    -----------
2022/4/12 0012 10:30   st      1.0         None
'''

import os

# 是否使用停用词
STOP_WORD = True

# ############# 常量
# 统计单位-数量
NUIT_COUNT_NUM = 1000
# 统计单位-时间
NUIT_COUNT_TIME = 10000

# 词组连接符
CHAR_CONNECTOR = '@_@'
LINE_CONNECTOR = '\t'
# ##############  N-Gram
NGRAM_MINI = 1
NGRAM_MAX = 8
NGRAM_NUM = 6


# 参数加权值
# 词频
K_freq = 0.5
# 互信息
K_pmi = 0.15
# 关联置信度
K_conf = 0.15
# 左右最小熵
K_H = 0.3

data_tag = '肺肿瘤'

# -----各类文件地址
current_path = os.path.dirname(__file__)
# 当前目录
root_path = os.path.dirname(current_path)
data_path = os.path.join(root_path, 'data')
# pkuseg分词模型路径
pkuseg_model_path = os.path.join(data_path, 'pkuseg/models/medicine')
pkuseg_user_path = os.path.join(data_path, 'pkuseg/user_dict/medicine/medical_word_dict.txt')
# 停用词路径
stop_word_path = os.path.join(data_path, 'stop_words/stopword.txt')
# 模型路径
# model_dir = os.path.join(root_path, 'data/model/ngram')
model_dir = os.path.join(data_path, 'model', 'ngram_' + data_tag + '_'+str(NGRAM_MAX))

matadata_dir = os.path.join(data_path, 'matadata', data_tag)
# 原始数据
# matadata_path = os.path.join(matadata_dir, 'matadata.xlsx')
matadata_path = os.path.join(matadata_dir, 'matadata.txt')
# 原始数据分句文件路径
matadata_path_sentences = os.path.join(matadata_dir, 'matadata_sentences.txt')
# 原始数据分词地址
matadata_path_words = os.path.join(matadata_dir, 'matadata_words.txt')
# 原始数据分词后词频统计
matadata_path_words_count = os.path.join(matadata_dir, 'matadata_words_count.txt')

bert_model_dir = os.path.join(data_path, 'chinese_L-12_H-768_A-12')

albert_model_dir = os.path.join(data_path, 'albert_tiny')
albert_g_model_dir = os.path.join(data_path, 'albert_tiny_google_zh')

stroke2vec_dir = os.path.join(data_path, 'stroke2vec')

hownet_dir = os.path.join(data_path, 'hownet')

